R Markdown
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## √ ggplot2 3.3.2 √ purrr 0.3.4
## √ tibble 3.0.3 √ dplyr 1.0.2
## √ tidyr 1.1.2 √ stringr 1.4.0
## √ readr 1.3.1 √ forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
## Warning: package 'plotly' was built under R version 4.0.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(gganimate)
## Warning: package 'gganimate' was built under R version 4.0.3
library(gifski)
## Warning: package 'gifski' was built under R version 4.0.3
data_cleaned <- read.csv("../data/cleaned.csv")
rate_year <- data_cleaned %>%
group_by(year) %>%
summarize(rate = length(which(college_attendance_status=="Yes"))/n())
## `summarise()` ungrouping output (override with `.groups` argument)
pyear = ggplot(rate_year, aes(x=year, y=rate)) +
geom_line() +
geom_point() +
labs(title = "Trend Over Time", x="Year", y= "College Attendence Rate") +
scale_x_continuous(breaks = seq(2008, 2018, 2)) +
transition_reveal(year)
animate(pyear, duration = 5, fps = 20, width = 400, height = 400, renderer = gifski_renderer())

anim_save("rate_year.gif",path="../plot")
rate_city <- data_cleaned %>%
group_by(year, metropolitan_status) %>%
summarize(rate = length(which(college_attendance_status=="Yes"))/n())
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
# pivot_wider(names_from =metropolitan_status, values_from = rate)
pcity = ggplot(rate_city, aes(x=year, y=rate, color = metropolitan_status)) +
geom_line() +
geom_point() +
scale_x_continuous(breaks = seq(2008, 2018, 2)) +
labs(title = "Trend Over Time by Metropolitan", x="Year", y= "College Attendence Rate") +
transition_reveal(year)
animate(pcity, duration = 5, fps = 20, width = 600, height = 400, renderer = gifski_renderer())

anim_save("ratebyCity.gif",path="../plot")
# plot_ly(rate_city, x=~year, y=~`in metropolitan and central city`, name = "in metropolitan and central city", type = "scatter", mode = "line") %>%
# add_trace(y=~`in metropolitian but mixed of central city`, name = "in metropolitian but mixed of central city", type = "scatter", mode = "line") %>%
# add_trace(y=~`in metropolitian but not in central city`, name = "in metropolitian but not in central city", type = "scatter", mode = "line") %>%
# add_trace(y=~`mixed of metropolitian status`, name = "mixed of metropolitian status", type = "scatter", mode = "line") %>%
# add_trace(y=~`Not in metropolitan area`, name = "Not in metropolitan area", type = "scatter", mode = "line")# plot_ly(rate_city, x=~year, y=~`in metropolitan and central city`, name = "in metropolitan and central city", type = "scatter", mode = "line") %>%
# add_trace(y=~`in metropolitian but mixed of central city`, name = "in metropolitian but mixed of central city", type = "scatter", mode = "line") %>%
# add_trace(y=~`in metropolitian but not in central city`, name = "in metropolitian but not in central city", type = "scatter", mode = "line") %>%
# add_trace(y=~`mixed of metropolitian status`, name = "mixed of metropolitian status", type = "scatter", mode = "line") %>%
# add_trace(y=~`Not in metropolitan area`, name = "Not in metropolitan area", type = "scatter", mode = "line")
rate_state <- data_cleaned %>%
group_by(year, states) %>%
summarize(rate = length(which(college_attendance_status=="Yes"))/n()) %>%
mutate(rank = min_rank(-rate)) %>%
ungroup()
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
# pivot_wider(names_from =metropolitan_status, values_from = rate)
pstate <-ggplot(rate_state,aes(rank,group=states,fill=as.factor(states),color=as.factor(states))) +
geom_tile(aes(y = rate/2,height = rate, width = 0.9), alpha = 0.8, color = NA) +
geom_text(aes(y = 0, label = paste(states, ' ')), vjust = 0.2, hjust = 1) +
geom_text(aes(y=rate,label = paste(' ',rate)), hjust=0)+
coord_flip(clip = 'off', expand = TRUE) +
scale_y_continuous(labels = scales::comma) +
scale_x_reverse() +
guides(color = FALSE, fill = FALSE) +
theme_minimal() +
theme(
plot.title=element_text(size=25, hjust=0.5, face='bold', colour='grey', vjust=-1),
plot.subtitle=element_text(size=18, hjust=0.5, face='italic', color='grey'),
plot.caption =element_text(size=8, hjust=0.5, face='italic', color='grey'),
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
plot.margin = margin(1,1,1,4, 'cm')
)
plt<-pstate + transition_states(states = year, transition_length = 4, state_length = 1) +
ease_aes('cubic-in-out') +
labs(title = 'College Attendance Rate per Year : {closest_state}',
x='',y='Total Suicides per year')
final_animation<-animate(plt,100,fps = 20,duration = 30, width = 950, height = 750, renderer = gifski_renderer())
anim_save("ratebyState.gif",path="../plot")
#
# pstate = ggplot(rate_state, aes(x=rank, y=rate, color = states)) +
# geom_tile(aes(y = rank, height = rate, width = .9, color = NA))+
# geom_text(aes(y = 0, label = paste(states, ' ')), vjust = 0.2, hjust = 1) +
# geom_text(aes(y=rate,label = paste(' ',rate)), hjust=0)+
# coord_flip(clip = 'off', expand = TRUE) +
# scale_y_continuous(labels = scales::comma) +
# scale_x_reverse() +
# guides(color = FALSE, fill = FALSE) +
# labs(title = "College Attendance Rate by State", y="Year", x= "College Attendence Rate")
# transition_reveal(year)
#
# pstate
data_omitna <- na.omit(data_cleaned)
rate_income <- data_omitna%>%
mutate(income = car::recode(`anually_family_income`, "lo:20000 = 'low';94500:hi='high';else = 'middle'")) %>%
# mutate(income = car::recode(`anually_family_income`, "lo:median(data_omitna$anually_family_income)*0.3 = 'low';median(data_omitna$anually_family_income)*2:hi='high';else = 'middle'")) %>%
group_by(year, income) %>%
summarize(rate = length(which(college_attendance_status=="Yes"))/n())
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
# pivot_wider(names_from =income, values_from = rate)
pincome = ggplot(rate_income, aes(x=year, y=rate, color = income)) +
geom_line() +
geom_point() +
scale_x_continuous(breaks = seq(2008, 2018, 2)) +
labs(title = "Trend Over Time by Income", x="Year", y= "College Attendence Rate") +
transition_reveal(year)
animate(pincome, duration = 5, fps = 20, width = 600, height = 400, renderer = gifski_renderer())

anim_save("ratebyIncome.gif",path="../plot")
# plot_ly(rate_income, x=~year, y=~`high`, name = "high income", type = "scatter", mode = "line") %>%
# add_trace(y=~`middle`, name = "middle income", type = "scatter", mode = "line") %>%
# add_trace(y=~`low`, name = "low income", type = "scatter", mode = "line")
rate_race <- data_cleaned %>%
group_by(year, race) %>%
summarize(rate = length(which(college_attendance_status=="Yes"))/n())
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
# pivot_wider(names_from =race, values_from = rate)
prace = ggplot(rate_race, aes(x=year, y=rate, color = race)) +
geom_line() +
geom_point() +
scale_x_continuous(breaks = seq(2008, 2018, 2)) +
labs(title = "Trend Over Time by Race", x="Year", y= "College Attendence Rate") +
transition_reveal(year)
animate(prace, duration = 5, fps = 20, width = 600, height = 400, renderer = gifski_renderer())

anim_save("ratebyRace.gif",path="../plot")
# plot_ly(rate_race, x=~year, y=~`White`, name = "White", type = "scatter", mode = "line") %>%
# add_trace(y=~`African American`, name = "African American", type = "scatter", mode = "line") %>%
# add_trace(y=~`American Indian or Alaska Native`, name = "American Indian or Alaska Native", type = "scatter", mode = "line") %>%
# add_trace(y=~`Other Asian or Pacific Islander`, name = "Other Asian or Pacific Islander", type = "scatter", mode = "line") %>%
# add_trace(y=~`Chinese`, name = "Chinese", type = "scatter", mode = "line") %>%
# add_trace(y=~`Japanese`, name = "Japanese", type = "scatter", mode = "line") %>%
# add_trace(y=~`Others`, name = "others", type = "scatter", mode = "line")
rate_noc <- data_omitna%>%
mutate(noc = car::recode(`number_of_children`, "0 = '0'; 1:2 = '1-2';3:hi='3+'")) %>%
group_by(year, noc) %>%
summarize(rate = length(which(college_attendance_status=="Yes"))/n())
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
# pivot_wider(names_from =noc, values_from = rate)
pnoc = ggplot(rate_noc, aes(x=year, y=rate, color = noc)) +
geom_line() +
geom_point() +
scale_x_continuous(breaks = seq(2008, 2018, 2)) +
labs(title = "Trend Over Time by Number of Children", x="Year", y= "College Attendence Rate") +
transition_reveal(year)
animate(pnoc, duration = 5, fps = 20, width = 600, height = 400, renderer = gifski_renderer())

anim_save("ratebyNoC.gif",path="../plot")
# plot_ly(rate_noc, x=~year, y=~`0`, name = "0 children", type = "scatter", mode = "line") %>%
# add_trace(y=~`1-2`, name = "1-2 children", type = "scatter", mode = "line") %>%
# add_trace(y=~`3+`, name = "3+ children", type = "scatter", mode = "line")
# rate_noc$frame = rate_noc$year
# plot_ly(rate_noc, x = ~year, y = ~rate, split = ~noc, frame = ~frame, type = 'scatter', mode = 'lines', line = list(simplyfy = F))
# animation_opts(frame = 100, transition = 0, redraw = FALSE) %>%
# animation_slider(hide = T) %>%
# animation_button(x = 1, xanchor = "right", y = 0, yanchor = "bottom")
n_school <- data_omitna %>%
mutate(income = car::recode(`anually_family_income`, "lo:20000 = 'low';94500:hi='high';else = 'middle'")) %>%
group_by(year, school_type, income) %>%
summarize(n=n())
## `summarise()` regrouping output by 'year', 'school_type' (override with `.groups` argument)
# pivot_wider(names_from =school_type, values_from = n)
plot_ly(n_school, x=~year, y = ~n, color = ~school_type, frame = ~income, type = "scatter", mode = "line")
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
# plot_ly(rate_school, x=~year, y=~`Private school`, name = "Private school", type = "scatter", mode = "line") %>%
# add_trace(y=~`Public school`, name = "Public school", type = "scatter", mode = "line") %>%
# add_trace(y=~`Not enrolled`, name = "Not enrolled", type = "scatter", mode = "line")